<?php

if(!defined('MOO_HOME')) {
    define('MOO_HOME', dirname(__FILE__) . '/..');
}

require MOO_HOME . '/sql/Language.php';
require MOO_HOME . '/sql/ExecPlan.php';
require MOO_HOME . '/sql/Parser.php';
require MOO_HOME . '/DataInterface.php';

final class MooLexer {
	protected $sql = '';
	protected $token_regexp = '';
	protected $bindValues = array();
	protected $tokens = array();
	protected $hash = '';
	protected $plans = array();
	protected $Parser = null; // MooParser reference
	protected $Optimizer = null; // MooOptimizer reference
	protected $db = null; // MooDataInterface reference
	
	const TOKEN_WHITESPACE	= 0;
	const TOKEN_WORD		= 1;
	const TOKEN_QUOTED_ID	= 2;
	const TOKEN_PLACEHOLDER	= 3;
	const TOKEN_MULTICHAR	= 4;
	const TOKEN_SINGLECHAR	= 5;
	const TOKEN_ILLEGAL		= 6;
	const TOKEN_RESERVED	= 7;
	const TOKEN_IDENTIFIER	= 8;
	
	const PLAN_PREFIX = 'MooExecPlan_';
	
	const REGEXP_STRIP =
		'/#  # replaceable SQL elements
			 #
			 # 1. numeric literals
			(
				\\b(?:
					\\d+\\.\\d*				# 12.3, 123.
				|	\\d*\\.\\d+				# .123
				|	\\d+					# 123
				)(?:[eE][+-]\\d+)?			# +E123, +e123, -E123, -e123
				\\b
			)
				|
			 # 2. comments
			(
				(?:--[^\\n]*)				# single-line
			|	(?:\\/\\*.*?\\*\\/)				# multi-line
			)
				|
			 # 3. single-quoted strings
			(
				# this is somewhat slow, but I found no other way yet:
				\'(?:[^\']|\'\'|\\\\.)*\'
			|	 "(?:[^ "]| " "|\\\\.)* "
				# There is no support for MySQL-like "character set introducers".
			)
				|
			 # 4. quoted identifiers
			(`[^`]+`)
		/xS';
	
	public function __construct($db) {
		if (!($db instanceof MooDataInterface)) {
			throw new Exception('DB must be specified.');
		}
		$this->db = $db;
		// Include the generated execution plans:
		$glob_pattern = $this->db->getDatabaseDirectory() . '/plans/' . MooLexer::PLAN_PREFIX;
		$glob_pat_len = strlen($glob_pattern);
		foreach (glob($glob_pattern . '*.php') as $_f) {
			$_h = substr($_f, $glob_pat_len, 32);
			$this->plans[$_h] = include($_f);
			$this->plans[$_h]->setDB($this->db);
		}
		
		// Generate regular expressions based on constants from Tokens.php.
		// Tokens.php is auto-generated by BNF parser.
		$schar_tokens = '[';
		foreach (MooLang::$c1 as $t) {
			$schar_tokens .= preg_quote($t, '!');
		}
		$schar_tokens .= ']';
		$mchar_tokens = '';
		foreach (MooLang::$cx as $t) {
			$mchar_tokens .= preg_quote($t, '!') . '|';
		}
		$mchar_tokens = substr($mchar_tokens, 0, strlen($mchar_tokens) - 1);
		$this->token_regexp =
		'!#
		  # 0. whitespace
			\\s+
			|
		  # 1. words
		 (\\b[a-zA-Z_][a-zA-Z_0-9]*\\b)
			|
		  # 2. quoted identifiers
		 (\\`[^\\`]+\\`)
			|
		  # 3. placeholders for literals (0, 1, 2, ...)
		 (\\b\\d+\\b)
			|
		  # 4. multi-character elements (such as >= or =<)
		 (' . $mchar_tokens . ')
			|
		  # 5. single-character elements
		 (' . $schar_tokens . ')
			|
		  # 6. illegal characters
		 (.+?)
		!x';
	}
	
	public function __destruct() {
		$this->db = null;
	}
	
	protected function strip() {
		// stripping literals
		$i = 0;
		$offset = 0;
		if (preg_match_all(self::REGEXP_STRIP, $this->sql, $matches, PREG_OFFSET_CAPTURE, $offset)) {
			foreach (array_reverse($matches[0]) as $m) {
				$val = $m[0];
				$lenV = strlen($val);
				$ofsV = $m[1];
				switch ($val[0]) { 
					case '/':
					case '-':
						// It's a comment, remove it.
						$this->sql = substr_replace($this->sql, '', $ofsV, $lenV);
						break;
					case '`':
						// Leave quoted identifiers intact.
						break;
					case '"':
					case "'":
						// Process all special sequences inside a string.
						$q = $val[0];
						$val = str_replace($q . $q, $q, substr($val, 1, strlen($val) - 2));
						// Compliance with MySQL:
						$val = str_replace(
							array(
								'\\0', "\\'", '\\"', '\\b',
								'\\n', '\\r', '\\t', '\\Z',
								'\\\\','\\%', '\\_'
							),
							array(
								"\0",  "'",   '"',   chr(8),
								"\n",  "\r",  "\t",  chr(26),
								'\\',  '%',   '_'
							),
							$val
						);
						// There is no break, so we proceed to substitution.
					default:
						// Bind values and substitute them with placeholders.
						$this->bindValues[$i] = $val;
						$bnd = ' ' . $i . ' ';
						$lenB = strlen($bnd);
						$this->sql = substr_replace($this->sql, $bnd, $ofsV, $lenV);
						$i++;
				}
			}
		}
	}
	
	protected function tokenize() {
		$this->tokens = array();
		if (preg_match_all($this->token_regexp, $this->sql, $matches, PREG_SET_ORDER)) {
			foreach ($matches as $m) {
				$token_id = count($m) - 1;
				$token = $m[0];
				if ($token_id === self::TOKEN_ILLEGAL) {
					throw new Exception('Illegal token: "' . $token . "\".\n");
				} 
				if ($token_id !== self::TOKEN_WHITESPACE) {
					if ($token_id === self::TOKEN_WORD) {
						if (in_array(strtoupper($token), MooLang::$r)) {
							$token_id = self::TOKEN_RESERVED;
							$token = strtoupper($token);
						} else {
							$token_id = self::TOKEN_IDENTIFIER;
						}
					} elseif ($token_id === self::TOKEN_QUOTED_ID) {
						$token_id = self::TOKEN_IDENTIFIER;
						$token = substr($token, 1, strlen($token) - 2);
					}
					$this->tokens[] = array($token_id, $token);
				}
			}
		}
	}
	
	protected function hash() {
		$this->hash = md5(var_export($this->tokens, true));
	}
	
	protected function getParser() {
		// lazy Parser creation
		if (!isset($this->Parser)) {
			require 'Parser.php';
			$this->Parser = new MooParser($this->db);
		}
		return $this->Parser;
	}
	
	protected function getOptimizer() {
		// lazy Optimizer creation
		if (!isset($this->Optimizer)) {
			require 'SimpleOptimizer.php';
			$this->Optimizer = new MooSimpleOptimizer($this->db);
		}
		$this->Optimizer->setQuery($this->tokens, $this->hash);
		return $this->Optimizer;
	}
	
	public function getPlan($sql) {
		$this->sql = $sql;
		$this->strip();
		$this->tokenize();
		$this->hash();
		// lazy parse and plan creation
		if (!class_exists(self::PLAN_PREFIX . $this->hash)) {
		    $this->parseTree = $this->getParser()->createParseTree($this->tokens);
			$this->plans[$this->hash] = $this->getOptimizer()->createPlan($this->parseTree);
		}
		return $this->plans[$this->hash];
	}
	
	public function __test($sql) {
		echo "============ FILES : \n\t" . join("\n\t", get_included_files()) . "\n\n";
		echo "============ PLANS :\n\t" . join("\n\t", array_keys($this->plans)) . "\n\n";
		echo "============ TOKEN REGEXP :\n" . $this->token_regexp . "\n\n";
		echo "============ INITIAL SQL :\n" . $sql . "\n\n";
		try {
			$this->getPlan($sql);
		} catch (Exception $e) {
			echo get_class($e) . ": " . $e->getMessage() . $e->getTraceAsString() . "\n\n";
		}
		echo "============ STRIPPED SQL :\n" . $this->sql . "\n\n";
		echo "============ BIND VALUES :\n" . var_export($this->bindValues, true) . "\n\n";
		echo "============ TOKENS :\n";
		foreach ($this->tokens as $t) {
			echo '[' . $t[0] . ':' . $t[1] . '] ';
		}
		echo "\n\n";
		echo "============ HASH :\n" . $this->hash . "\n\n";
	}
}

if (array_shift(get_included_files()) === __FILE__) {
	if (isset($argv[1])) {
		$yp = new MooLexer(new MooDataInterface(MOO_HOME . '/test'));
		$yp->__test($argv[1]);
	} else {
		echo "Usage: php " . basename(__FILE__) . " <sql text>\n";
	}
}

?>
